# 糗事百科--全站段子抓取--保存到mysql数据库里面
# https://www.qiushidabaike.com/text_318.html  总共是318页

import urllib.request
import re
import pymysql

# 创建链接对象
conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='root', db='qiushi', charset='utf8')
# 获取光标对象
cursor = conn.cursor()

# 定义sql语句
sql = ''' create table chat(
    id int(11) not null primary key auto_increment,
    talk varchar(500),
    username varchar(100)
)

'''
# 执行sql语句
cursor.execute(sql)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'
}

for i in range(1, 319):
    url = f'https://www.qiushidabaike.com/text_{str(i)}.html'
    request = urllib.request.Request(url=url, headers=headers)
    data = urllib.request.urlopen(request).read().decode('utf-8')
    pattern = '<dl class="main-list">.*?<dd class="content">(.*?)</dd>.*?</dl>'
    userPattern = '<span class="title"><a target="_blank" href=".*?">(.*?)</a></span>'
    result = re.compile(pattern, re.S).findall(data)
    usernames = re.compile(userPattern, re.S).findall(data)
    # print(usernames)
    # print(result)
    # print(data)

    # 数据入库
    for index in range(len(usernames)):
        item = result[index].replace(r'\n', '').replace(r'\t', '').strip()
        name = usernames[index]
        # 往数据库插入多条数据
        cursor.executemany('insert into chat(talk,username) values (%s,%s)', [(item, name)])
        conn.commit()

# 关闭光标对象
cursor.close()
# 关闭链接
conn.close()
